# This file contains the S-plus code that was used to implement the estimators and functionals in "Variable bandwidths for nonparametric hazard rate estimation", Communications in Statistics - Theory and Methods, (38): 1055-1078, (2009) # As a fully working example the code herein can reproduce Figure 5 of the paper (the relevant data are also given below). # The same functions (with different input data) can be used in reproducing other figures of the same paper # More general use of the estimates is also feasible by supplying the corresponding data set. Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function. #N.B. Compatibility with R is not guaranteed as the functions were not written for use with R - however, with minimal modifications, their use in the R environment should be feasible. igaussian <- function(x) { ifelse(x < -4, 0, ifelse(x> 4, 1, pnorm(x, 0, 1)))}#, ifelse(abs(x) > 4, 0, 0)) } "gaussian1"<- function(x){ifelse(x < -4, 0, ifelse(x> 4, 0, dnorm(x, 0, 1)))}#, ifelse(abs(x) > 4, 0, 0)) #ifelse(abs(x)< 4, dnorm(x,0,1), ifelse(abs(x)>4, 0,0)) #} gaussian2 <- function(x) { ifelse(x < -4, 0, ifelse(x> 4, 0, dnorm(x, 0, 1)))} "HigherOrder"<-function(x){ifelse(x < -4, 0, ifelse(x> 4, 0, dnorm(x, 0, 1) * (3-x^2)/2))}#, ifelse(abs(x) > 4, 0, 0)) } f<-function(x) { 2/3 * dnorm(x,3,1)+1/3*dnorm(x,2,.2) } Intf<-function(x) { 2/3 * pnorm(x,3,1)+1/3*pnorm(x,2,.2) } NormHazard<-function(x) { f(x)/(1-Intf(x)) } "Thre1"<- #Watson - Leadbetter hazard rate (see Phd) function(xin, xout, kfun) { n<- length(xin) n1<-1:n xin.use<-sort(xin) h<-0.23*diff(quantile(xin.use, c(.25, .75)))#NOTE: It doesn't matter if I use xin.use or xin in the interquartile. arg1<-(sapply(xout, "-", xin.use))/h arg2<-kfun(arg1)* 1/(n-n1+1) arg3<-colSums(arg2)* 1/h arg3 } "Thre11"<- #Watson - Leadbetter hazard rate (see Phd) function(xin, xout, kfun) { n<- length(xin) n1<-1:n xin.use<-sort(xin) h<-0.16*diff(quantile(xin.use, c(.25, .75)))#NOTE: It doesn't matter if I use xin.use or xin in the interquartile. arg1<-(sapply(xout, "-", xin.use))/h arg2<-kfun(arg1)* 1/(n-n1+1) arg3<-colSums(arg2)* 1/h arg3 } "iThre1"<- #Watson - Leadbetter integrated hazard rate (cum. haz. est.) function(xin, xout, kfun) { n<- length(xin) n1<-1:n xin.use<-sort(xin) h<-0.23*diff(quantile(xin.use, c(.25, .75))) arg1<-(sapply(xout, "-", xin.use))/h arg2<-kfun(arg1)* 1/(n-n1+1) arg3<-colSums(arg2) arg3 } "SimpsonInt"<- function(xin, h) { #xin = data, h=distance of the points at which function is evaluated n<-length(xin) nn<-1:n int0<-xin[1] int2n<-xin[n] inteven<-xin[seq(3,n-1,by=2)] intodd<-xin[seq(2,n-2,by=2)] sumodd<-sum(intodd) sumeven<-sum(inteven) res<-(h/3)*(int0+4*sumodd+2*sumeven+int2n) res } "ikde"<- function(xin, xout, kfun) { n<- length(xin) h<-0.23*diff(quantile(xin, c(.25, .75))) arg1<-(sapply(xout, "-", xin))/h arg2<-kfun(arg1) arg3<-colSums(arg2) arg3/n } "Thre2"<- #Transformed hazard rate estimator (2nd stage) using nonparametric transformation -log(1-\hat F(x)) function(xin, xout, kfun) { n<-length(xin) n1<-1:n nn<-length(xout) xin.use<-sort(xin) GofX<- -logb(1- ikde(xin.use, xout, igaussian), base=exp(1))#, base=exp(1)) #iThre1(xin.use, xout, igaussian) Tsample<- -logb(1- ikde(xin.use, xin.use, igaussian), base=exp(1))#, base=exp(1)) #iThre1(xin.use, xin.use, igaussian) hVector<-seq(0.01, 1, by=.01) #seq(0.01, 10, by=.01) LengthhVec<-length(hVector) TransEstimate<-sapply(1:LengthhVec, function(i, xin.use, xout, kfun, Tsample, n, n1, GofX, hVector) { h2<-hVector[i]*diff(quantile(xin.use, c(.25, .75))) #.5 GofXdashed<- Thre1(xin.use, xout, kfun) #NormHazard(xout) # arg1<-(sapply(GofX, "-", Tsample))/h2 arg2<-kfun(arg1)* 1/(n-n1+1) arg3<-colSums(arg2) arg3*GofXdashed/h2 } ,xin.use, xout, kfun, Tsample, n, n1, GofX, hVector) real.curve <-matrix(NormHazard(xout), nrow=nrow(TransEstimate), ncol=ncol(TransEstimate)) MSE<-(TransEstimate - real.curve)^2 dist<-xout[2]-xout[1] IMSE<-sapply(1:ncol(MSE), function(i, MSE, dist) SimpsonInt(MSE[,i], dist), MSE, dist) index<-order(IMSE)[1] TransEstimate[,index] } "VarHre"<- #Adaptive variable bandwidth hazard rate estimator function(xin, xout, h2, kfun=Epanechnikov) { #Simple kernel function xin: data, xout: points at which we evaluate the est. #h: bandwidth default kernel: biweight n<- length(xin) nn<-length(xout) n1<-1:n xin.use<-sort(xin) kernel<-Thre1(xin.use, xin.use, kfun) #Pilot estimate (W-L) SqrtKernel<-sqrt(kernel) vhre<-sapply(1:nn, function(i, xin.use, xout, h2, kfun, n, n1, SqrtKernel) { sum( (SqrtKernel * kfun( ((xout[i]-xin.use)/h2) * SqrtKernel) )/(n-n1+1)) }, xin.use, xout, h2, kfun, n, n1, SqrtKernel) vhre/h2 } #for(i in 1:30) #{ set.seed(64) x<-seq(0, 3.9,length=100) plot(x, NormHazard(x), type="l", xlab = "x", ylab="Hazard rate", xlim=c(0,4)) key(text=list(c("variable bandwidth", "plug-in estimate", "higher order", "ordinary", "True"), font =4), lines = list(lty=c(3,7,6, 8, 0)), corner=c(0,1), cex= 1.15) #cex specifies font size x1<- c(rnorm(ceiling(2/3 *400),3,1),rnorm(ceiling(1/3*400),2,.2)) cat("\n", quantile(x1, c(.9)), "\n") arg1<-Thre2(x1, x, gaussian1) arg2<-VarHre(x1, x, 0.15) horder<-Thre1(x1, x, HigherOrder) arg3<-Thre11(x1,x,gaussian1) arg1[c(90:100)]<-arg3[c(90:100)] lines(x, arg1, lty=3) # TRANSFORMED ESTIMATE HAS BEEN CREATED HERE lines(x, arg2, lty=7) #VARIABLE BANDWIDTH ESTIMATE HAS BEEN CREATED HERE lines(x, horder, lty=6) lines(x, arg3, lty=8) #}